R Markdown

loading necessary packages

library(usmap)
library(ggplot2)
library(readr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(maps)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(dslabs)
library(stringr)
library(rstudioapi)
library(tidyverse) # ggplot2, dplyr, tidyr, readr, purrr, tibble
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble  3.0.3     ✓ purrr   0.3.4
## ✓ tidyr   1.1.2     ✓ forcats 0.5.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date()        masks base::date()
## x dplyr::filter()          masks stats::filter()
## x lubridate::intersect()   masks base::intersect()
## x dplyr::lag()             masks stats::lag()
## x purrr::map()             masks maps::map()
## x lubridate::setdiff()     masks base::setdiff()
## x lubridate::union()       masks base::union()
library(magrittr) # pipes
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
#install.packages("lintr")
library(lintr) # code linting
#install.packages("sf")
library(sf) # spatial data handling
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
library(raster) # raster handling (needed for relief)
## Loading required package: sp
## 
## Attaching package: 'raster'
## The following object is masked from 'package:magrittr':
## 
##     extract
## The following object is masked from 'package:tidyr':
## 
##     extract
## The following object is masked from 'package:dplyr':
## 
##     select
#install.packages("viridis")
library(viridis) # viridis color scale
## Loading required package: viridisLite
#install.packages("cowplot")
library(cowplot) # stack ggplots
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:lubridate':
## 
##     stamp
library(rmarkdown)
library(ggthemes)
## 
## Attaching package: 'ggthemes'
## The following object is masked from 'package:cowplot':
## 
##     theme_map
#install.packages("ggalt")
library(ggalt)
## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2
#install.packages("biscale")
library(biscale)
library(cowplot)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(viridis)
library(RColorBrewer)

First we made a new variable for % of population >65 years

clean_df_updated <- read_csv("clean_df_updated.csv")
## Warning: Missing column names filled in: 'X1' [1]
## 
## ── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   GEOID = col_character(),
##   NAME = col_character(),
##   State = col_character(),
##   date = col_date(format = ""),
##   county = col_character(),
##   state = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
clean_df_updated$percent_over_65 <- (clean_df_updated$total_over_65 / clean_df_updated$total_population)*100 
clean_df_updated_quantiles <- clean_df_updated

Then we made three quantiles of the variables for the bivariate map

summary(clean_df_updated_quantiles$percent_over_65)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.799  15.442  17.982  18.255  20.661  55.596
case_rate_quantile<- quantile(clean_df_updated_quantiles$cases_per_1000ppl,c(0.33,0.66,1), na.rm = TRUE)
poverty_rate_quantile<- quantile(clean_df_updated_quantiles$percent_below_poverty_level,c(0.33,0.66,1), na.rm = TRUE)
high_mask_usage_quantile <-quantile(clean_df_updated_quantiles$high_mask_usage_sum,c(0.33,0.66,1), na.rm = TRUE)
household_size_quantile <-quantile(clean_df_updated_quantiles$avg_household_size,c(0.33,0.66,1), na.rm = TRUE)
percent_over_65_quantile <-quantile(clean_df_updated_quantiles$percent_over_65,c(0.33,0.66,1), na.rm = TRUE)
pop_density_quantile <-quantile(clean_df_updated_quantiles$pop_density_sq_km,c(0.33,0.66,1), na.rm = TRUE)
worked_home_quantile<-quantile(clean_df_updated_quantiles$homeoffice_per_1000ppl,c(0.33,0.66,1), na.rm = TRUE)
non_white_quantile <- quantile(clean_df_updated_quantiles$non_white_proportion ,c(0.33,0.66,1), na.rm = TRUE)
mask_always_quantile <- quantile(clean_df_updated_quantiles$ALWAYS ,c(0.33,0.66,1), na.rm = TRUE)
mask_high_quantile <- quantile(clean_df_updated_quantiles$high_mask_usage_sum ,c(0.33,0.66,1), na.rm = TRUE)

Then we mutated into categorical variables 1-3 to represent the three quantiles for each variable

clean_df_updated_quantiles<- clean_df_updated_quantiles %>% mutate(
                               y= ifelse(percent_below_poverty_level<poverty_rate_quantile[1],1,ifelse(percent_below_poverty_level<poverty_rate_quantile[2],2,3)) ,
                               x= ifelse(cases_per_1000ppl<case_rate_quantile[1],1,ifelse(cases_per_1000ppl<case_rate_quantile[2],2,3)),
                               z= ifelse(high_mask_usage_sum<high_mask_usage_quantile[1],1,ifelse(high_mask_usage_sum<high_mask_usage_quantile[2],2,3)),
                               a= ifelse(avg_household_size<household_size_quantile[1],1,ifelse(avg_household_size<household_size_quantile[2],2,3)),
                               b= ifelse(percent_over_65<percent_over_65_quantile[1],1,ifelse(percent_over_65<percent_over_65_quantile[2],2,3)),
                               c= ifelse(pop_density_sq_km<pop_density_quantile[1],1,ifelse(pop_density_sq_km<pop_density_quantile[2],2,3)),
                               d= ifelse(homeoffice_per_1000ppl<worked_home_quantile[1],1,ifelse(homeoffice_per_1000ppl<worked_home_quantile[2],2,3)),
                               e= ifelse(non_white_proportion<non_white_quantile[1],1,ifelse(non_white_proportion<non_white_quantile[2],2,3)),
                               f= ifelse(ALWAYS<mask_always_quantile[1],1,ifelse(ALWAYS<mask_always_quantile[2],2,3)),
                                g=  ifelse(high_mask_usage_sum<mask_high_quantile[1],1,ifelse(high_mask_usage_sum<mask_high_quantile[2],2,3))
                               )  

Thens we transformed the indicator variables to be numeric

clean_df_updated_quantiles$x = as.numeric(clean_df_updated_quantiles$x)
clean_df_updated_quantiles$y = as.numeric(clean_df_updated_quantiles$y)
clean_df_updated_quantiles$z = as.numeric(clean_df_updated_quantiles$z)
clean_df_updated_quantiles$a = as.numeric(clean_df_updated_quantiles$a)
clean_df_updated_quantiles$b = as.numeric(clean_df_updated_quantiles$b)
clean_df_updated_quantiles$c = as.numeric(clean_df_updated_quantiles$c)
clean_df_updated_quantiles$d = as.numeric(clean_df_updated_quantiles$d)
clean_df_updated_quantiles$e = as.numeric(clean_df_updated_quantiles$e)
clean_df_updated_quantiles$f = as.numeric(clean_df_updated_quantiles$f)
clean_df_updated_quantiles$g = as.numeric(clean_df_updated_quantiles$g)

Then we made a single variable for each comparison using ifelse and based on the matrix between the two variables, created a new variable with values 1-9

#single variable for covid case rate and poverty 
clean_df_updated_quantiles$bivariate <- ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$y==1, 1, 
                                               ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$y==1, 2, 
                                                      ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$y==1, 3,
                                                             ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$y==2, 4,
                                                                    ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$y==2, 5,
                                                                           ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$y==2, 6,
                                                                                  ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$y==3, 7,
                                                                                         ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$y==3, 8,
                                                                                                ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$y==3, 9, 
                                                                                                       FALSE)))))))))

#single variable for covid case rate and mask use
clean_df_updated_quantiles$bivariate_mask <- ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$z==1, 1, 
                                                    ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$z==1, 2, 
                                                           ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$z==1, 3,
                                                                  ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$z==2, 4,
                                                                         ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$z==2, 5,
                                                                                ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$z==2, 6,
                                                                                       ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$z==3, 7,
                                                                                              ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$z==3, 8,
                                                                                                     ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$z==3, 9, 
                                                                                                            FALSE)))))))))
#single variable for covid case rate and household size 
clean_df_updated_quantiles$bivariate_household <- ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$a==1, 1, 
                                                    ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$a==1, 2, 
                                                           ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$a==1, 3,
                                                                  ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$a==2, 4,
                                                                         ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$a==2, 5,
                                                                                ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$a==2, 6,
                                                                                       ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$a==3, 7,
                                                                                              ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$a==3, 8,
                                                                                                     ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$a==3, 9, 
                                                                                                            FALSE)))))))))

#single variable for covid case rate and percent over 65 
clean_df_updated_quantiles$bivariate_over_65 <- ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$b==1, 1, 
                                                         ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$b==1, 2, 
                                                                ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$b==1, 3,
                                                                       ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$b==2, 4,
                                                                              ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$b==2, 5,
                                                                                     ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$b==2, 6,
                                                                                            ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$b==3, 7,
                                                                                                   ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$b==3, 8,
                                                                                                          ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$b==3, 9, 
                                                                                                                 FALSE)))))))))

#single variable for covid case rate and population density 
clean_df_updated_quantiles$bivariate_pop_density <- ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$c==1, 1, 
                                                       ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$c==1, 2, 
                                                              ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$c==1, 3,
                                                                     ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$c==2, 4,
                                                                            ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$c==2, 5,
                                                                                   ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$c==2, 6,
                                                                                          ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$c==3, 7,
                                                                                                 ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$c==3, 8,
                                                                                                        ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$c==3, 9, 
                                                                                                               FALSE)))))))))

#single variable for covid case rate and worked from home 
clean_df_updated_quantiles$bivariate_worked_home <- ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$d==1, 1, 
                                                           ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$d==1, 2, 
                                                                  ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$d==1, 3,
                                                                         ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$d==2, 4,
                                                                                ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$d==2, 5,
                                                                                       ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$d==2, 6,
                                                                                              ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$d==3, 7,
                                                                                                     ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$d==3, 8,
                                                                                                            ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$d==3, 9, 
                                                                                                                   FALSE)))))))))
#single variable for covid case rate and proportion non white 
clean_df_updated_quantiles$bivariate_non_white <- ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$e==1, 1, 
                                                           ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$e==1, 2, 
                                                                  ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$e==1, 3,
                                                                         ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$e==2, 4,
                                                                                ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$e==2, 5,
                                                                                       ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$e==2, 6,
                                                                                              ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$e==3, 7,
                                                                                                     ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$e==3, 8,
                                                                                                            ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$e==3, 9, 
                                                                                                                   FALSE)))))))))
#single variable for covid case rate always mask users 
clean_df_updated_quantiles$bivariate_mask_always <- ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$f==1, 1, 
                                                           ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$f==1, 2, 
                                                                  ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$f==1, 3,
                                                                         ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$f==2, 4,
                                                                                ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$f==2, 5,
                                                                                       ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$f==2, 6,
                                                                                              ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$f==3, 7,
                                                                                                     ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$f==3, 8,
                                                                                                            ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$f==3, 9, 
                                                                                                                   FALSE)))))))))
#single variable for covid case rate and always + frequntly mask users 
clean_df_updated_quantiles$bivariate_mask_mostly <- ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$g==1, 1, 
                                                           ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$g==1, 2, 
                                                                  ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$g==1, 3,
                                                                         ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$g==2, 4,
                                                                                ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$g==2, 5,
                                                                                       ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$g==2, 6,
                                                                                              ifelse(clean_df_updated_quantiles$x==1 & clean_df_updated_quantiles$g==3, 7,
                                                                                                     ifelse(clean_df_updated_quantiles$x==2 & clean_df_updated_quantiles$g==3, 8,
                                                                                                            ifelse(clean_df_updated_quantiles$x==3 & clean_df_updated_quantiles$g==3, 9, 
                                                                                                                   FALSE)))))))))

Then we loaded US county map data and plotted base map to make sure it looked right. We then removed lowercases and renamed and merged with our updated data

AllCounty <- map_data("county")
AllCounty %>% ggplot(aes(x = long, y = lat, group = group)) +
  geom_polygon(color = "red", fill = NA, size = .1 )

clean_df_updated_quantiles$county = tolower(clean_df_updated_quantiles$county)
AllCounty <- AllCounty %>% rename("county" = "subregion")


AllCounty = left_join(AllCounty, clean_df_updated_quantiles, by= "county")

Making all bivariate variables factor variables

AllCounty$bivariate <- as.factor(AllCounty$bivariate)
AllCounty$bivariate_mask <- as.factor(AllCounty$bivariate_mask)
AllCounty$bivariate_household <- as.factor(AllCounty$bivariate_household)
AllCounty$bivariate_over_65 <- as.factor(AllCounty$bivariate_over_65)
AllCounty$bivariate_pop_density <- as.factor(AllCounty$bivariate_pop_density)
AllCounty$bivariate_worked_home <- as.factor(AllCounty$bivariate_worked_home)
AllCounty$bivariate_non_white <- as.factor(AllCounty$bivariate_non_white)
AllCounty$bivariate_mask_always <- as.factor(AllCounty$bivariate_mask_always)
AllCounty$bivariate_mask_mostly <- as.factor(AllCounty$bivariate_mask_mostly)

Testing out with basic map

AllCounty %>% ggplot(aes(x = long, y = lat, group = group, fill = x)) + 
  geom_polygon(color = "NA") +
  theme(panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.title = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  coord_fixed(1.3) 

bivarate map of cases and poverty

#bivarate map of cases and poverty 
AllCounty$bivariate <- as.factor(AllCounty$bivariate)
map_poverty <- AllCounty %>% ggplot(aes(x = long, y = lat, group = group, fill = bivariate)) + 
  geom_polygon(color = "NA") +
  theme(legend.position = "None",
    panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.title = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  coord_fixed(1.3) +
  labs(
    title = "Bivariate choropleth map of COVID-19 rates and poverty in the US",
    subtitle = "Sources: US Census and NYTimes on 11/30/20")

cbp1 <- c("#E8E8E8", "#DFB0D6", "#BE64AC", "#ACE4E4", "#A5ADD3", "#8C62AA", "#5AC8C8", "#5698B9", "#3B4994")

map_poverty <-map_poverty + scale_fill_manual(values = cbp1)
#map_poverty 


#legend for map of cases and poverty
melt(matrix(1:9,nrow=3))
##   Var1 Var2 value
## 1    1    1     1
## 2    2    1     2
## 3    3    1     3
## 4    1    2     4
## 5    2    2     5
## 6    3    2     6
## 7    1    3     7
## 8    2    3     8
## 9    3    3     9
legendGoal=melt(matrix(1:9,nrow=3))
test<-ggplot(legendGoal, aes(Var2,Var1,fill = as.factor(value)))+ geom_tile()
test<- test + scale_fill_manual(name="",values=cbp1)
lg_poverty<-test + theme(legend.position="none", axis.text=element_blank(),line=element_blank()) + xlab("Increasing Poverty-->") + ylab("Increasing COVID rates -->")
#lg_poverty 

#map plus legend for cases and poverty 
ggdraw() +
  draw_plot(map_poverty, 0, 0, 1, 1) +
  draw_plot(lg_poverty, 0.05, 0.075, 0.25, 0.25)

bivarate map of cases and mask use

#bivarate map of cases and mask use 
map_mask <- AllCounty %>% ggplot(aes(x = long, y = lat, group = group, fill = bivariate_mask)) + 
  geom_polygon(color = "NA") +
  theme(legend.position = "None",
        panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.title = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  coord_fixed(1.3) +
  labs(
    title = "Bivariate choropleth map of COVID-19 rates and mask use in the US",
    subtitle = "Sources: US Census and NYTimes on 11/30/20")

cbp1 <- c("#E8E8E8", "#DFB0D6", "#BE64AC", "#ACE4E4", "#A5ADD3", "#8C62AA", "#5AC8C8", "#5698B9", "#3B4994")

map_mask <-map_mask + scale_fill_manual(values = cbp1)
#map_mask 


#legend for map of cases and mask use
melt(matrix(1:9,nrow=3))
##   Var1 Var2 value
## 1    1    1     1
## 2    2    1     2
## 3    3    1     3
## 4    1    2     4
## 5    2    2     5
## 6    3    2     6
## 7    1    3     7
## 8    2    3     8
## 9    3    3     9
legendGoal=melt(matrix(1:9,nrow=3))
test<-ggplot(legendGoal, aes(Var2,Var1,fill = as.factor(value)))+ geom_tile()
test<- test + scale_fill_manual(name="",values=cbp1)
lg_mask<-test + theme(legend.position="none", axis.text=element_blank(),line=element_blank()) + xlab("Increasing mask use-->") + ylab("Increasing COVID rates -->")
#lg_mask 


#map plus legend for cases and mask use  
ggdraw() +
  draw_plot(map_mask, 0, 0, 1, 1) +
  draw_plot(lg_mask, 0.05, 0.075, 0.25, 0.25)

bivarate map of cases and household size

#bivarate map of cases and household size 
map_household <- AllCounty %>% ggplot(aes(x = long, y = lat, group = group, fill = bivariate_household)) + 
  geom_polygon(color = "NA") +
  theme(legend.position = "None",
        panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.title = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  coord_fixed(1.3) +
  labs(
    title = "Bivariate choropleth map of COVID-19 rates and household size in the US",
    subtitle = "Sources: US Census and NYTimes on 11/30/20")
cbp1 <- c("#E8E8E8", "#DFB0D6", "#BE64AC", "#ACE4E4", "#A5ADD3", "#8C62AA", "#5AC8C8", "#5698B9", "#3B4994")

map_household <-map_household + scale_fill_manual(values = cbp1)
#map_household 


#legend for map of cases and household size 
library(reshape2)
melt(matrix(1:9,nrow=3))
##   Var1 Var2 value
## 1    1    1     1
## 2    2    1     2
## 3    3    1     3
## 4    1    2     4
## 5    2    2     5
## 6    3    2     6
## 7    1    3     7
## 8    2    3     8
## 9    3    3     9
legendGoal=melt(matrix(1:9,nrow=3))
test<-ggplot(legendGoal, aes(Var2,Var1,fill = as.factor(value)))+ geom_tile()
test<- test + scale_fill_manual(name="",values=cbp1)
lg_household<-test + theme(legend.position="none", axis.text=element_blank(),line=element_blank()) + xlab("Increasing household size-->") + ylab("Increasing COVID rates -->")
#lg_household 

#map plus legend for cases and household size 
ggdraw() +
  draw_plot(map_household, 0, 0, 1, 1) +
  draw_plot(lg_household, 0.05, 0.075, 0.25, 0.25)

bivarate map of cases and % over 65

#bivarate map of cases and % over 65
map_65 <- AllCounty %>% ggplot(aes(x = long, y = lat, group = group, fill = bivariate_over_65)) + 
  geom_polygon(color = "NA") +
  theme(legend.position = "None",
        panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.title = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  coord_fixed(1.3) +
  labs(
    title = "Bivariate choropleth map of COVID-19 rates and % of population over 65 years in the US",
    subtitle = "Sources: US Census and NYTimes on 11/30/20")

cbp1 <- c("#E8E8E8", "#DFB0D6", "#BE64AC", "#ACE4E4", "#A5ADD3", "#8C62AA", "#5AC8C8", "#5698B9", "#3B4994")

map_65 <-map_65 + scale_fill_manual(values = cbp1)
#map_65 


#legend for map of cases and % over 65 
melt(matrix(1:9,nrow=3))
##   Var1 Var2 value
## 1    1    1     1
## 2    2    1     2
## 3    3    1     3
## 4    1    2     4
## 5    2    2     5
## 6    3    2     6
## 7    1    3     7
## 8    2    3     8
## 9    3    3     9
legendGoal=melt(matrix(1:9,nrow=3))
test<-ggplot(legendGoal, aes(Var2,Var1,fill = as.factor(value)))+ geom_tile()
test<- test + scale_fill_manual(name="",values=cbp1)
lg_65<-test + theme(legend.position="none", axis.text=element_blank(),line=element_blank()) + xlab("Increasing percent over 65-->") + ylab("Increasing COVID rates -->")
#lg_65 

#map plus legend for cases and % over 65
ggdraw() +
  draw_plot(map_65, 0, 0, 1, 1) +
  draw_plot(lg_65, 0.05, 0.075, 0.25, 0.25)

bivarate map of cases and pop density

#bivarate map of cases and pop density 
map_density <- AllCounty %>% ggplot(aes(x = long, y = lat, group = group, fill = bivariate_pop_density)) + 
  geom_polygon(color = "NA") +
  theme(legend.position = "None",
        panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.title = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  coord_fixed(1.3) +
  labs(
    title = "Bivariate choropleth map of COVID-19 rates and population density in the US",
    subtitle = "Sources: US Census and NYTimes on 11/30/20")

cbp1 <- c("#E8E8E8", "#DFB0D6", "#BE64AC", "#ACE4E4", "#A5ADD3", "#8C62AA", "#5AC8C8", "#5698B9", "#3B4994")

map_density <-map_density + scale_fill_manual(values = cbp1)
#map_density 


#legend for map of cases and population density 
melt(matrix(1:9,nrow=3))
##   Var1 Var2 value
## 1    1    1     1
## 2    2    1     2
## 3    3    1     3
## 4    1    2     4
## 5    2    2     5
## 6    3    2     6
## 7    1    3     7
## 8    2    3     8
## 9    3    3     9
legendGoal=melt(matrix(1:9,nrow=3))
test<-ggplot(legendGoal, aes(Var2,Var1,fill = as.factor(value)))+ geom_tile()
test<- test + scale_fill_manual(name="",values=cbp1)
lg_density<-test + theme(legend.position="none", axis.text=element_blank(),line=element_blank()) + xlab("Increasing population density-->") + ylab("Increasing COVID rates -->")
#lg_density 

#map plus legend for cases and population density 
ggdraw() +
  draw_plot(map_density, 0, 0, 1, 1) +
  draw_plot(lg_density, 0.05, 0.075, 0.25, 0.25)

bivarate map of cases and worked from home

#bivarate map of cases and worked from home 
map_home <- AllCounty %>% ggplot(aes(x = long, y = lat, group = group, fill = bivariate_worked_home)) + 
  geom_polygon(color = "NA") +
  theme(legend.position = "None",
        panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.title = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  coord_fixed(1.3) +
  labs(
    title = "Bivariate choropleth map of COVID-19 rates and % working from home (WFH) in the US",
    subtitle = "Sources: US Census and NYTimes on 11/30/20")
  

cbp1 <- c("#E8E8E8", "#DFB0D6", "#BE64AC", "#ACE4E4", "#A5ADD3", "#8C62AA", "#5AC8C8", "#5698B9", "#3B4994")

map_home <-map_home + scale_fill_manual(values = cbp1)
#map_home 


#legend for map of cases and worked from home
melt(matrix(1:9,nrow=3))
##   Var1 Var2 value
## 1    1    1     1
## 2    2    1     2
## 3    3    1     3
## 4    1    2     4
## 5    2    2     5
## 6    3    2     6
## 7    1    3     7
## 8    2    3     8
## 9    3    3     9
legendGoal=melt(matrix(1:9,nrow=3))
test<-ggplot(legendGoal, aes(Var2,Var1,fill = as.factor(value)))+ geom_tile()
test<- test + scale_fill_manual(name="",values=cbp1)
lg_home<-test + theme(legend.position="none", axis.text=element_blank(),line=element_blank()) + xlab("Increasing WFH-->") + ylab("Increasing COVID rates -->")
#lg_home 

#map plus legend for cases and worked from home 
ggdraw() +
  draw_plot(map_home, 0, 0, 1, 1) +
  draw_plot(lg_home, 0.05, 0.075, 0.25, 0.25)

bivarate map of cases and race

#bivarate map of cases and race 
map_race <- AllCounty %>% ggplot(aes(x = long, y = lat, group = group, fill = bivariate_non_white)) + 
  geom_polygon(color = "NA") +
  theme(legend.position = "None",
        panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.title = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  coord_fixed(1.3) +
  labs(
    title = "Bivariate choropleth map of COVID-19 rates and race in the US",
    subtitle = "Sources: US Census and NYTimes on 11/30/20")

cbp1 <- c("#E8E8E8", "#DFB0D6", "#BE64AC", "#ACE4E4", "#A5ADD3", "#8C62AA", "#5AC8C8", "#5698B9", "#3B4994")

map_race <-map_race + scale_fill_manual(values = cbp1)
#map_race 


#legend for map of cases and race
melt(matrix(1:9,nrow=3))
##   Var1 Var2 value
## 1    1    1     1
## 2    2    1     2
## 3    3    1     3
## 4    1    2     4
## 5    2    2     5
## 6    3    2     6
## 7    1    3     7
## 8    2    3     8
## 9    3    3     9
legendGoal=melt(matrix(1:9,nrow=3))
test<-ggplot(legendGoal, aes(Var2,Var1,fill = as.factor(value)))+ geom_tile()
test<- test + scale_fill_manual(name="",values=cbp1)
lg_race<-test + theme(legend.position="none", axis.text=element_blank(),line=element_blank()) + xlab("Increasing Prop non-White-->") + ylab("Increasing COVID rates -->")
#lg_race

#map plus legend for cases and race 
ggdraw() +
  draw_plot(map_race, 0, 0, 1, 1) +
  draw_plot(lg_race, 0.05, 0.075, 0.25, 0.25)

bivarate map of cases and always mask use

#bivarate map of cases and always mask use 
map_mask_always <- AllCounty %>% ggplot(aes(x = long, y = lat, group = group, fill = bivariate_mask_always)) + 
  geom_polygon(color = "NA") +
  theme(legend.position = "None",
        panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.title = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  coord_fixed(1.3) +
  labs(
    title = "Bivariate choropleth map of COVID-19 rates and mask use in the US",
    subtitle = "Sources: US Census and NYTimes on 11/30/20")

cbp1 <- c("#E8E8E8", "#DFB0D6", "#BE64AC", "#ACE4E4", "#A5ADD3", "#8C62AA", "#5AC8C8", "#5698B9", "#3B4994")
map_mask_always <-map_mask_always + scale_fill_manual(values = cbp1)
map_mask_always

#legend for map of cases and always mask use
melt(matrix(1:9,nrow=3))
##   Var1 Var2 value
## 1    1    1     1
## 2    2    1     2
## 3    3    1     3
## 4    1    2     4
## 5    2    2     5
## 6    3    2     6
## 7    1    3     7
## 8    2    3     8
## 9    3    3     9
legendGoal=melt(matrix(1:9,nrow=3))
test<-ggplot(legendGoal, aes(Var2,Var1,fill = as.factor(value)))+ geom_tile()
test<- test + scale_fill_manual(name="",values=cbp1)
lg_mask_always<-test + theme(legend.position="none", axis.text=element_blank(),line=element_blank()) + xlab("Increasing mask use-->") + ylab("Increasing COVID rates -->")


#map plus legend for cases and always mask use 
ggdraw() +
  draw_plot(map_mask_always, 0, 0, 1, 1) +
  draw_plot(lg_mask_always, 0.05, 0.075, 0.25, 0.25)

bivarate map of cases and mostly mask use

#bivarate map of cases and mostly mask use 
map_mask_mostly <- AllCounty %>% ggplot(aes(x = long, y = lat, group = group, fill = bivariate_mask_mostly)) + 
  geom_polygon(color = "NA") +
  theme(legend.position = "None",
        panel.grid.major = element_blank(), 
        panel.background = element_blank(),
        axis.title = element_blank(), 
        axis.text = element_blank(),
        axis.ticks = element_blank()) +
  coord_fixed(1.3) +
  labs(
    title = "Bivariate choropleth map of COVID-19 rates and mask use in the US",
    subtitle = "Sources: US Census and NYTimes on 11/30/20")

cbp1 <- c("#E8E8E8", "#DFB0D6", "#BE64AC", "#ACE4E4", "#A5ADD3", "#8C62AA", "#5AC8C8", "#5698B9", "#3B4994")

map_mask_mostly <-map_mask_mostly + scale_fill_manual(values = cbp1)
#map_mask_mostly


#legend for map of cases and mostly mask use
melt(matrix(1:9,nrow=3))
##   Var1 Var2 value
## 1    1    1     1
## 2    2    1     2
## 3    3    1     3
## 4    1    2     4
## 5    2    2     5
## 6    3    2     6
## 7    1    3     7
## 8    2    3     8
## 9    3    3     9
legendGoal=melt(matrix(1:9,nrow=3))
test<-ggplot(legendGoal, aes(Var2,Var1,fill = as.factor(value)))+ geom_tile()
test<- test + scale_fill_manual(name="",values=cbp1)
lg_mask_mostly<-test + theme(legend.position="none", axis.text=element_blank(),line=element_blank()) + xlab("Increasing mask use-->") + ylab("Increasing COVID rates -->")
#lg_race

#map plus legend for cases and mostly mask use 
ggdraw() +
  draw_plot(map_mask_mostly, 0, 0, 1, 1) +
  draw_plot(lg_mask_mostly, 0.05, 0.075, 0.25, 0.25)